Quick ggplot2 grammar overview

ggplot2 graphics consist of individual layers. Each layer is defined by 5 dimensions:

  • data: Data in the form of a data.frame
  • aesthetic mappings: Assignment rules for how the variables are assigned to the dimensions of the plot
  • geom: Plot type (scatterplot, boxplot, …). geom_ functions are shortcuts for creating a layer
  • stat: Possible transformation of the data before plotting
  • position adjustment: Spatial arrangement of the graphic elements

Formatting the data

wdi <- readRDS("wdi_daten.rds")
wdi_info <- readRDS("wdi_info.rds")
wdi13 <- wdi[wdi$Jahr == 2013,]
wdi_westeuropa <- wdi[wdi$Region == "Western Europe", ]
head(wdi)
##   Isocode                 Land Jahr     Bev    AL KSL    ZS  ZE Kontinent
## 1      AD              Andorra 2010   84419    NA 3.3 100.0 100    Europe
## 2      AD              Andorra 2011   82326    NA 3.2 100.0 100    Europe
## 3      AD              Andorra 2012   79316    NA 3.1 100.0 100    Europe
## 4      AD              Andorra 2013   75902    NA 3.0 100.0 100    Europe
## 5      AE United Arab Emirates 2010 8329453 4.171 8.5  97.5 100      Asia
## 6      AE United Arab Emirates 2011 8734722 4.128 8.2  97.5 100      Asia
##            Region  log_BIP  Bev_kat AL_kat
## 1 Southern Europe 21.93119  0-1 Mio   <NA>
## 2 Southern Europe 21.95496  0-1 Mio   <NA>
## 3 Southern Europe 21.86945  0-1 Mio   <NA>
## 4 Southern Europe 21.90159  0-1 Mio   <NA>
## 5    Western Asia 26.37943 1-10 Mio   0-5%
## 6    Western Asia 26.57698 1-10 Mio   0-5%
wdi_info
##    WDI_indicator                                                         Info
## 1 NY.GDP.MKTP.CD                                            GDP (current US$)
## 2    SP.POP.TOTL                                            Population, total
## 3 SL.UEM.TOTL.ZS                 Unemployment, total (% of total labor force)
## 4    SH.DYN.MORT              Mortality rate, under-5 (per 1,000 live births)
## 5    SH.STA.ACSN Improved sanitation facilities (% of population with access)
## 6 EG.ELC.ACCS.ZS                      Access to electricity (% of population)
##   Variablenname
## 1           BIP
## 2           Bev
## 3            AL
## 4           KSL
## 5            ZS
## 6            ZE
gdp <- read.csv2("GDP.csv")
head(gdp,10)
##     X Rank Country      GDP
## 1   5    1     USA 18036648
## 2   6    2   China 11064665
## 3   7    3   Japan  4383076
## 4   8    4 Germany  3363447
## 5   9    5      UK  2861091
## 6  10    6  France  2418836
## 7  11    7   India  2088841
## 8  12    8   Italy  1821497
## 9  13    9  Brazil  1803653
## 10 14   10  Canada  1552808

Basic Examples

Scatterplot

g <- ggplot(wdi13, aes(x = ZS, y = KSL))
# Shortcut-Definition der Ebene durch geom_point()
g + geom_point()
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Alternative: Selbe Ebene per Hand defineren
g + layer(geom = "point", stat = "identity", position = "identity")
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

Bar graph

g <- ggplot(wdi13[!is.na(wdi13$AL_kat),],
            aes(x = Kontinent, fill = AL_kat))

g+geom_bar()

# Shortcut-Defintion der Ebene durch geom_bar()
g + geom_bar(position = "fill")

# Alternative: Selbe Ebene per Hand defineren
g + layer(geom = "bar", stat = "count", position = "fill")

Histogram

g <- ggplot(wdi13, aes(x = AL))
# Shortcut-Definton der Ebene durch geom_histogram()
g + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 30 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Alternative: Selbe Ebene per Hand defineren
g + layer(geom = "bar", stat = "bin", position = "stack")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 30 rows containing non-finite outside the scale range
## (`stat_bin()`).

Several layers

Aesthetic mapping

g <- ggplot(wdi_westeuropa, aes(x = Jahr, y = AL, color = Land))
g + geom_point()
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).

geom

g <- ggplot(wdi_westeuropa, aes(x = Jahr, y = AL, color = Land))
g + geom_line(aes(group = Land))
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).

g + geom_point() + geom_line(aes(group = Land))
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(wdi_westeuropa, aes(x = "", y = AL)) + geom_boxplot()
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ggplot(wdi_westeuropa, aes(x = Jahr, y = AL)) + geom_boxplot()
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

position

g <- ggplot(wdi13[!is.na(wdi13$AL_kat),],
            aes(x = Kontinent, fill = AL_kat))
g + geom_bar(position = "stack")

g + geom_bar(position = "fill")

g + geom_bar(position = "dodge")

Coord flip

g + geom_bar(position = "stack") + coord_flip()

Optics

Legends

g <- ggplot(wdi13, aes(x = ZS, y = KSL, color = Kontinent)) +
  geom_point()
g
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

g + guides(color = guide_legend(title = "Continent"))
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Position
g + theme(legend.position = "bottom")
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

g + theme(legend.justification=c(0,1),
          legend.position=c(0.75,0.95))
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Colors

g + scale_color_manual(values = c("black","red","yellow","green","blue"))
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

# RColorBrewer zur Verwendung vordefinierter Sets
g + scale_color_brewer(palette = "Set1")
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

g + scale_color_brewer(palette = "Set2")
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

Point shapes

g + geom_point(shape = 3)
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

g2 <- g + geom_point(aes(shape = Kontinent))
g2
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

g2 + scale_shape_manual(values = c(3,3,3,1,1))
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

Facet grid

g + facet_grid(. ~ Kontinent)
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

g + facet_grid(Kontinent ~ .)
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).

g + facet_grid(Bev_kat ~ Kontinent)
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_point()`).